{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {
    "collapsed": true
   },
   "source": [
    "# 使用信息熵寻找最佳划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "from sklearn import datasets\n",
    "\n",
    "iris = datasets.load_iris()\n",
    "# 为了可视化，取后2个特征\n",
    "# 由于只展示效果，不评估模型的泛化能力，所以不做train test split\n",
    "X = iris.data[:,2:]\n",
    "y = iris.target"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 对其使用决策树进行划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "DecisionTreeClassifier(class_weight=None, criterion='entropy', max_depth=2,\n            max_features=None, max_leaf_nodes=None,\n            min_impurity_decrease=0.0, min_impurity_split=None,\n            min_samples_leaf=1, min_samples_split=2,\n            min_weight_fraction_leaf=0.0, presort=False, random_state=None,\n            splitter='best')"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.tree.tree import DecisionTreeClassifier\n",
    "\n",
    "dt_clf = DecisionTreeClassifier(max_depth=2, criterion='entropy')\n",
    "dt_clf.fit(X,y)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 绘制决策边界"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/seamonster/MachineLearningClassicAlgorithmEnv/lib/python3.6/site-packages/matplotlib/contour.py:967: UserWarning: The following kwargs were not used by contour: 'linewidth'\n  s)\n"
     ]
    },
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAD8CAYAAACMwORRAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4xLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvAOZPmwAAHBJJREFUeJzt3XuQXPWZ3vHvOxckGEYMWAIJJCMo2LUxWaRlImNry0WtYwKIMi7WJHIcx3ZRq9TGOJBsVSqgFLfYy26lyo7KOHYpQBZ2vbDmYke7iDXesmq5uLiMZLEGiZQVbCKB0ADSSKMbaGbe/NFnZnrOnJ4+ffp0n0s/n6opun/zO6dfidKr1jnPvG3ujoiIlEtX1gWIiEj61NxFREpIzV1EpITU3EVESkjNXUSkhNTcRURKqG5zN7P5Zvaimb1sZq+a2Z0Re+aZ2V+b2S4ze8HMlreiWBERiSfOO/f3gd9390uAFcCVZnZZaM8NwAF3vwD4NvBn6ZYpIiKNqNvcveJw8LQ3+Ar/5NO1wAPB40eBT5uZpValiIg0pCfOJjPrBrYCFwDfdfcXQlvOAXYDuPuYmR0EPgS8GzrPOmAdQN+8eZdeuHhxc9VLQ947+UNZlyAiTdr92rZ33X1RvX2xmru7jwMrzGwA+JGZXezurzRalLtvBDYCrFy+3LesX9/oKaQJD6z4ctYliEiTbl417404+xpKy7j7CLAFuDL0rTeBZQBm1gOcBrzXyLlFRCQ9cdIyi4J37JjZycBngNdC2zYBk28LPw/8zDWRTEQkM3EuyywBHgiuu3cBP3T3vzWzu4Ahd98E3Af8hZntAvYDa1tWsYiI1FW3ubv7PwIrI9Zvq3p8HLg+3dJERCQp/YSqiEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAnVbe5mtszMtpjZDjN71cxuithzuZkdNLPtwddtrSlXRETi6ImxZwz4Y3ffZmb9wFYz+6m77wjte8bdr0m/RBEpkl0jTzI0fA+Hx/Zxas9ZDJ55IxcMXJXoOCDRuSRGc3f3vcDe4PGome0EzgHCzV1EOtyukSd5Zu83GPfjABwee5tn9n4DYM6mHHXcP7x1B2bGhJ9o6FxS0dA1dzNbDqwEXoj49ifM7GUze9LMPpZCbSJSMEPD90w16Enjfpyh4XsaPs4Zm2rsjZxLKuJclgHAzE4FHgNudvdDoW9vA85198NmdjXwY+DCiHOsA9YBLD3jjMRFi0g+HR7b19B63O8n3dvJYr1zN7NeKo39B+7+ePj77n7I3Q8HjzcDvWa2MGLfRncfdPfBhf39TZYuInlzas9ZDa3H/X7SvZ0sTlrGgPuAne7+rRp7Fgf7MLNVwXnfS7NQEcm/wTNvpNvmz1jrtvlTN0cbOc7ooct6Gz6XVMS5LLMa+BLwSzPbHqzdCnwYwN2/D3we+CMzGwOOAWvd3VtQr4jk2OSNzkYTLrWOS3IuqYiTlnkWsDp77gF0l0MkZ5LGEuN67q27eW3kcZwJjC4+MnAdq8++JdFrXDBwVeRxaubJxL6hKiLFkjSWGNdzb93NzpFHp547E1PPV599S9Pnl+Zo/IBISSWNJcb12sisbMWc69Jeau4iJZU0lhiXM9HQurSXmrtISSWNJcZlNdpHrXVpL/1fECmppLHEuD4ycF1D69JeuqEqUlJJY4lxTd40jUrLSPbU3EU6TNzpi1Fr4b8YVp99y6xmHid+GTei2eooZ1J5rauamrtIScWdtPj03jtxd5yxOfclne4YPi5uRLPVUc6k8lpXmK65i5RU3EmLE35iqrHPtS/pdMfwcXEjmq2OciaV17rC1NxFSqoV0xOTTnesXo8b0Wx1lDOpvNYVpuYuUlKtmJ6YdLpj9XrciGaro5xJ5bWuMDV3kZKKO2mxy3qx0O23pBMZ48Qv40Y0Wx3lTCqvdYXphqpISTUyaTHuWtLpjtXHxY1otjrKmVRe6wqzrCbzrly+3LesX5/Ja3eqB1Z8OesSpGSKHmmMkvdab141b6u7D9bbp3fuIpJI0SONUYpUaz265i4iiRQ90hilSLXWo+YuIokUPdIYpUi11qPmLiKJFD3SGKVItdaj5i4iiRQ90hilSLXWoxuqIpJI0SONUYpUaz1q7iKSWK0PtU66Lw+KVOtc1NxFJFI4773s1N9j9+FnU/nhp7xkyfNSRyuouYvILFF5750jj059v5HxwUlH/rZaXupoFd1QFZFZovLeYXHHBycd+dtqeamjVdTcRWSWtHPdSUb+tlpe6mgVNXcRmSXtXHeSkb+tlpc6WkXNXURmicp7h8UdH5x05G+r5aWOVtENVRGZJSrvnVZaJi9Z8rzU0Sp1R/6a2TLgQeAswIGN7r4htMeADcDVwFHgK+6+ba7zauRv+2nkb7m0MqpYdM1EHPMej0xz5O8Y8Mfuvs3M+oGtZvZTd99Rtecq4MLg6+PA94L/ikgLxIkqRsUSo+KLZYr/QXMRxzLFI+tec3f3vZPvwt19FNgJnBPadi3woFc8DwyY2ZLUqxURIF5UMSqWGBVfLFP8D5qLOJYpHtnQDVUzWw6sBF4IfescYHfV8z3M/gsAM1tnZkNmNvTu6GhjlYrIlFZGFYuumYhjmeKRsZu7mZ0KPAbc7O6HkryYu29090F3H1zY35/kFCJCa6OKRddMxLFM8chYzd3Meqk09h+4++MRW94EllU9XxqsiUgLxIkqRsUSo+KLZYr/QXMRxzLFI+veUA2SMPcBO939WzW2bQJuNLOHqdxIPejue9MrU0SqtTKqWHTNRBzLFI+Mk5ZZDXwJ+KWZbQ/WbgU+DODu3wc2U4lB7qIShfxq+qWKSLXwaNpdI0+y+/CzM/bsO7qdI2PDgHNkbJh9R7ez+uxbcjulMSxpXc2M7e2Ykb/u/ixgdfY48LW0ihKRxkRF+P7hrdtxxqf2OBNTccnVZ98y57F5iP/lta6i0PgBkRKIivBVN/Zqr43MvG2W1/hfXusqCjV3kRJoJKrnTMQ6Nuv4X17rKgo1d5ESaCSqZ6E/9nmN/+W1rqJQcxcpgagIn9EdufcjA9fVPTYP8b+81lUUmgopUgK1Inz7jm7ntZHHcSYwuvjIwHUzbqbOdWzWNy3zWldRqLmLFMArv/pDXvxgKxNU/rm96qRLOdh3/qzGvfa3nph17GT2va/nTM46ZUXk+ePE/5576+5Zr3fWKStiNd84kcZae+I087xGObOk5i6Sc6/86g95/oOtYJVE8gRUnp+YnqodFXNMM0r43Ft3z5g6Ofl6O0ceozIJvPb549ShSY7p0zV3kZx7saqxTwk/D1THHNOMEobjk9Nmfh5E1Pnj1KFJjulTcxfJuYn6W6ZUxxzTjBKG45NzCZ8/Th2a5Jg+NXeRnGvkD2l1zDHNKGE4PjmX8Pnj1KFJjulTcxfJuVUnXQrhj8Os8fGY1THHNKOE4fjktJmXh6LOH6cOTXJMn26oiuTcxRf+T4iZlqmOOaYZJZw8b5K0TJw6NMkxfXU/ILtV9AHZ7acPyBYpvjQ/IFtK4qaP/WXWJXS8J97YyYZXnuPto6MsPqWfmy5ezZpzP5rOyXcMwdOb4dABWHA6fOpquKhuD5CCuTnmPjV3kTZ54o2d3LH17zk+XvnQ6r1HR7lj698DNN/gdwzB3/0QxoIPvz50oPIc1OA7lG6oirTJhleem2rsk46Pj7HhleeaP/nTm6cb+6SxE5V16Uhq7iJt8vbR0YbWG3LoQGPrUnpq7iJtsviU/obWG7Lg9MbWpfTU3EXa5KaLVzO/e+ZtrvndPdx08ermT/6pq6Gnd+ZaT29lXTqSbqiKtMnkTdOWpGUmb5oqLSMBNXeRNlpz5Ahrdr813YDPOzJ7U9JI40WDs/flJB7Z0gioRFJzF2mXOHHFNCONOYlHtjQCKjXpmrtIu8SJK6YZacxJPLKlEVCpSc1dpF3ixBXTjDTmJB7Z0gio1KTmLtIuceKKaUYacxKPbGkEVGpScxdplzhxxTQjjTmJR7Y0Aio16YaqSLvEiSumGWnMSTyypRFQqaluczez+4FrgGF3vzji+5cD/xv4dbD0uLvflWaRIrnSTLxwy9/AkUOVx4cOwFOPzT5XXE89Ai8/Dz4B1sUTH/0nbLCxGQ2Uvj42LDubt4/2V9b6+ljT+K+4aWvO/aiaeZvFeef+58A9wINz7HnG3a9JpSKRPGsmXvjd26cb+6QPjle+Js/1xEPQZTA+Pvf5n3oEtv986ukTp8znjiPvcbyrcqV179FR/stLP8HMODExMbWmCGLnqHvN3d2fBva3oRaR/GsmXhhu7FF8Yrqxz3X+l5+f8XTD6QNTjX3qMPepxj5JEcTOkdYN1U+Y2ctm9qSZfazWJjNbZ2ZDZjb07qhiUFJAWcULw+f3mU377Z7u2KdSBLEzpNHctwHnuvslwHeAH9fa6O4b3X3Q3QcX9isGJQWUVbwwfH6b+Ud38Vjo3f4cFEHsDE03d3c/5O6Hg8ebgV4zW9h0ZSJ51Ey8sG9B/T3WBd2hd+FR57/kshlPbzowwvzQJZgeM3pDl2oUQewcTTd3M1tsZhY8XhWc871mzyuSSxcNwpX/Yvqd9ILTK8/jpGW+dufsBn/S/JnnWvMFuGpt/fNfcT2s+OTUO/g1R49zR9+HWHJKPwYsOaWfb/zTf85/Hbxixtodl/4z3UztEHGikA8BlwMLzWwPcDvQC+Du3wc+D/yRmY0Bx4C17u4tq1gkLWlOX4wSiipyyWWVBh9VQ7U9v4bRg5XHowcrz2F2rVdcX/kKrAm+wlrZzDXtMb8sqz68cvly37J+fSav3akGvhj/plvphSONULn8EfddeD2hqOKUFZ+cbshRNVjXrJulketp1ppQeNojVC776F8HrWV9N2x197r/4zV+QDpTqycmhqKKketRNUQ19qj1HHz4taY95puau3SmVkca4zTpZl8r4w+/1rTHfFNzl87U6kij1fijVb3e7Gtl/OHXmvaYb2ru0plaPTExFFWMXI+qIc5fCpCLD7/WtMd801RI6Uytnpg4edM0nJapSrfUrGHPr2cft/S8zKc7hmnaY74pLdNBlJYRKb64aRm9c5dia2b8bpSH/wf8v19NP+9bAEcPz3wXDbPfWUetRb3bhplr538UXt+Zq3fkUg56595BSvfOPe2serixN8sMqv98dXfDhNdO0kAu8uuSb8q5S/mlnVVPs7HDzMYOlVG+czV2yEV+XcpBzV2KK6vxu61W9PolF9TcpbiyGr/bakWvX3JBzV2KK+2s+ocvbL6mapVhqdO6u2vn2CflIL8u5aDmLsXVzPjdKGv/3ewG37dguiFbV2XwV9Wo3TnX1vyrmbVdtbYy0rd6bcUn06tfpIqikFJsccfvRomKUf7OKhh5d+5o4o6h6fhi/2mVyONFgzN/QGlyX5J60453SkdSc5fOFI5RHjoATzwEXTb9AdWHDlT2wHRzjTouvKeRfXHqinOcSIguy0hnqjVudzz0WaThaGLc+GXSmGarRxFLx1Bzl87USNywem/c+GXSmGZZ453Sdmru0pkaiRtW740bv0wa0yxrvFPaTs1dOlOtcbvdoREN4Whi3Phl0phmq0cRS8fQDVXpTLXG7UatVd/IjDsqOOlI4VaPIpaOoeYu6cprjC+qrihxoopx45dJY5rNxDtFAmrukp68xvii6nry4ZkTGvNSq0hKdM1d0pPXGF9UXVETGvNQq0hK1NwlPXmN8SWNPYoUmJq7pCevMb6ksUeRAlNzl/TkNcYXVVfUhMY81CqSEt1QlfTkNcaXNPYoUmB1m7uZ3Q9cAwy7+8UR3zdgA3A1cBT4irtvS7tQKYikMb6nHpn9AdPhKYtx99WKY0bVFTXxUR9gLSUQ57LMnwNXzvH9q4ALg691wPeaL0s6ylOPwPafT6dXfKLy/KlHGt83GXucvDE6GXGMGr8bFnXs9p8nO5dIxuo2d3d/Gtg/x5ZrgQe94nlgwMyWpFWgdICXn4+3HmdfM3HMqGPDFJeUgkjjhuo5wO6q53uCtVnMbJ2ZDZnZ0Lujoym8tJRCOG9eaz3OvmbimHFjkIpLSgG0NS3j7hvdfdDdBxf297fzpSXPan2uaHg9zr5m4phxY5CKS0oBpNHc3wSWVT1fGqyJxHPJZfHW4+xrJo4ZdWyY4pJSEGk0903Av7GKy4CD7r43hfNKp7ji+ugPmA6nYOLsa+ZDs6OO1QdYS0GZu8+9wewh4HJgIbAPuB3oBXD37wdRyHuoJGqOAl9197pxgpXLl/uW9eubKl4aM/DF7vqbRCTXrO+Gre5e9x1G3Zy7u3+hzvcd+FoDtYmISItp/ICISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlJCau4hICam5i4iUkJq7iEgJqbmLiJSQmruISAmpuYuIlFDdD8iW+jaPnsx39vfz9lg3i3vG+foZo1zdfyzrskSkg6m5N2nz6Mnc9c5pHPfKP4L2jvVw1zunAajBi0hmdFmmSd/Z3z/V2Ccd9y6+s78/o4pERNTcm/b2WHdD6yIi7aDm3qTFPeMNrYuItIOae5O+fsYo821ixtp8m+DrZ4xmVJGIiG6oNm3ypqnSMiKSJ7Gau5ldCWwAuoF73f1PQ9//CvDfgDeDpXvc/d4U68y1q/uPqZmLSK7Ube5m1g18F/gMsAd4ycw2ufuO0Na/dvcbW1BjISn7LiJZinPNfRWwy91fd/cPgIeBa1tbVrFNZt/3jvXg2FT2ffPoyVmXJiIdIk5zPwfYXfV8T7AW9gdm9o9m9qiZLUuluoJS9l1EspZWWuZvgOXu/jvAT4EHojaZ2TozGzKzoXdHy5smUfZdRLIWp7m/CVS/E1/K9I1TANz9PXd/P3h6L3Bp1IncfaO7D7r74ML+8r6LVfZdRLIWp7m/BFxoZueZ2UnAWmBT9QYzW1L19LPAzvRKLB5l30Uka3XTMu4+ZmY3Aj+hEoW8391fNbO7gCF33wT8ezP7LDAG7Ae+0sKac0/ZdxHJWqycu7tvBjaH1m6renwLcEu6pbVX3Ojiuj1n8OL786aer5r3Pp877disY9N8TRGRRuknVIk/tne6sdvU2ovvz+Ol4Xl4sLZ3rIfbhwdwnDFqn0+jgkWklTRbhvjRxXBjr7Cpxj7pBDbV2GudT3FJEWklNXfaF12sPp/ikiLSSmrutC+6WH0+xSVFpJXU3IkfXVw1733AQ0c7Flrrxelh7vMpLikiraTmTuUG5m2LDrKkZwzDWdIzxm2LDs66sblx6f6qBl/5WjXvfb555siMY+88c4S7zpz7fHFfU0QkCaVlAr841su+sW4c2DfWzS+O9fLjgydHxh537++Zii9+7rRjkSN/4wwJ06hgEWkVNXfgm8MLeGS0j8kkzAQEz6Fe7DEqvqiYo4hkTZdlgMeqGvs0i1wLxx6j4ouKOYpI1tTcIXTrs3Hh+KJijiKSNTV3mv9NCMcXFXMUkaypuQN/0H+EqIhjnNhjVHxRMUcRyZqaO7D+zENc33+ErqChd+Fc338kVuwxKr6omKOIZK10aZm4kxa/ObyAx0b7mKDyN1wfE1PX3ieArUdP4o3xmb89W98/iZ3DvYxODQTr5u7hBXz7nX7e8enr6YtsnP+wqP67dE2FFJFWKVVzjxtBjIo+Vhr2dBLm9fHe4NH02jgwGkrRjNLFqM/c9453c+vwwNSapkKKSLuV6rJM3Ahi7ehj+Hl6a5oKKSLtVKrmHjeC2Gz0MSlNhRSRdilVc48bQczqF62pkCLSLqVq7nEjiLWjj+Hn6a1pKqSItFOpmnvcCGJU9LGfCapjj+d3n6C76jk43RH7+plgkY3PWFtk4/xJncik4pIi0krmHn7X2R4rly/3LevXZ/LanWrgi7qeL1J01nfDVncfrLevVFHIWpLmyaOOu/e9vqqYJJzffYLHl7/byvJFRBpWqssyUSbz5HvHenBsKk9eb9561HG3Dg8Ejd2mvl4f7+W63yxsxy9FRCS20jf3pHnyqONqZdqr38mLiORB6Zt70jy58uYiUmSlb+5J8+TKm4tIkZW+uSfNk0cdVyvTfn73iabrFBFJU+mbe9I8edRxf3LmSNDIZ+bhlZYRkbyJFYU0syuBDUA3cK+7/2no+/OAB4FLgfeAf+nuv0m31OSu7j+W6IeDoo7TDxmJSBHUfeduZt3Ad4GrgIuAL5jZRaFtNwAH3P0C4NvAn6VdqIiIxBfnsswqYJe7v+7uHwAPA9eG9lwLPBA8fhT4tJmFM4MiItImcS7LnAPsrnq+B/h4rT3uPmZmB4EPATMuRpvZOmBd8PTw6evW/Z8kRce0MPz6BZN+/evqb0mRfv+zU+TaQfXXc26cTW0dP+DuG4GN7XgtMxuKM38hr1R/topcf5FrB9WfljiXZd4EllU9XxqsRe4xsx7gNCo3VkVEJANxmvtLwIVmdp6ZnQSsBTaF9mwCvhw8/jzwM89q3KSIiNS/LBNcQ78R+AmVKOT97v6qmd0FDLn7JuA+4C/MbBewn8pfAFlry+WfFlL92Spy/UWuHVR/KjKb5y4iIq1T+p9QFRHpRGruIiIlVLrmbmb3m9mwmb2SdS1JmNkyM9tiZjvM7FUzuynrmuIys/lm9qKZvRzUfmfWNSVhZt1m9gsz+9usa2mUmf3GzH5pZtvNbCjrehplZgNm9qiZvWZmO83sE1nXFJeZ/Xbw+z75dcjMbs6snrJdczezTwGHgQfd/eKs62mUmS0Blrj7NjPrB7YCn3P3HRmXVlfwU8l97n7YzHqBZ4Gb3P35jEtriJn9R2AQWODu12RdTyPM7DfAoLsX8oeAzOwB4Bl3vzdI553i7iNZ19WoYGzLm8DH3f2NLGoo3Tt3d3+aSmKnkNx9r7tvCx6PAjup/ARw7nnF4eBpb/BVqHcPZrYUWAPcm3UtncbMTgM+RSV9h7t/UMTGHvg08H+zauxQwuZeJma2HFgJvJBtJfEFlzS2A8PAT929MLUH/jvwn4DwMP+icOApM9sajPsokvOAd4D/FVwWu9fM+rIuKqG1wENZFqDmnlNmdirwGHCzux/Kup643H3c3VdQ+UnmVWZWmEtjZnYNMOzuW7OupQm/5+6/S2WK69eCy5RF0QP8LvA9d18JHAH+c7YlNS64nPRZ4JEs61Bzz6HgevVjwA/c/fGs60ki+Of0FuDKrGtpwGrgs8F164eB3zezv8y2pMa4+5vBf4eBH1GZ6loUe4A9Vf/ae5RKsy+aq4Bt7r4vyyLU3HMmuCl5H7DT3b+VdT2NMLNFZjYQPD4Z+AzwWrZVxefut7j7UndfTuWf1T9z93+dcVmxmVlfcBOe4HLGFUBhUmPu/jaw28x+O1j6NJD7IEGEL5DxJRlo81TIdjCzh4DLgYVmtge43d3vy7aqhqwGvgT8Mrh2DXCru2/OsKa4lgAPBEmBLuCH7l64OGGBnQX8KPgohR7gr9z977ItqWFfB34QXNp4HfhqxvU0JPhL9TPAv828lrJFIUVERJdlRERKSc1dRKSE1NxFREpIzV1EpITU3EVESkjNXUSkhNTcRURK6P8DQvphtANyYsAAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x10927ed68>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from playML.plot_utils import plot_decision_boundary\n",
    "\n",
    "plot_decision_boundary(dt_clf, axis=[0.5, 7.5, 0, 3])\n",
    "plt.scatter(X[y==0,0],X[y==0,1])\n",
    "plt.scatter(X[y==1,0],X[y==1,1])\n",
    "plt.scatter(X[y==2,0],X[y==2,1])\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 模拟使用信息熵进行划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def split(X, y, d, value):\n",
    "    \"\"\"\n",
    "    这个函数不是帮我们寻找d和value，而是在已知d和value的情况下，帮我们划分数据集\n",
    "    :param X: \n",
    "    :param y: \n",
    "    :param d:   划分的维度 \n",
    "    :param value:  划分的维度的阈值\n",
    "    :return: \n",
    "    \"\"\"\n",
    "    index_a = X[:,d]<=value\n",
    "    index_b = X[:,d]>value\n",
    "    \n",
    "    return X[index_a],X[index_b],y[index_a],y[index_b]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 计算信息熵\n",
    "from collections import Counter\n",
    "\n",
    "# y里边有每个样本的分类信息，所以足以让我们计算信息熵\n",
    "def entropy(y):\n",
    "    # Counter返回一个字典，key为y的类别，value为该类别出现次数\n",
    "    counter = Counter(y)\n",
    "    # 信息熵\n",
    "    res = 0.0\n",
    "    for num in counter.values():\n",
    "        # p就是该类别出现概率：出现次数/总数\n",
    "        p = num/len(y)\n",
    "        res += - p * np.log(p)\n",
    "    return res"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "# 寻找划分的维度和阈值\n",
    "def try_split(X, y):\n",
    "    # 我们的目的是寻找一个信息熵最小的划分，那么我们先设置初始信息熵为正无穷\n",
    "    best_entropy = float('inf')\n",
    "    best_d, best_value = -1, -1\n",
    "\n",
    "    for d in range(X.shape[1]):\n",
    "        # 我们对每一维度上所有的值进行排序，每个可能选择的阈值就是两个相邻的值的均值\n",
    "        sorted_index = np.argsort(X[:, d])\n",
    "        for i in range(1, X.shape[0]):\n",
    "            # 注意前后两个数有可能刚好相等，那就会无法分开，所以应该跳过\n",
    "            if (X[sorted_index[i - 1], d] != X[sorted_index[i], d]):\n",
    "                v = (X[sorted_index[i - 1], d] + X[sorted_index[i], d]) / 2.\n",
    "                X_l, X_r, y_l, y_r = split(X, y, d, v)\n",
    "                # 信息熵\n",
    "                e = entropy(y_l) + entropy(y_r)\n",
    "                \n",
    "                if e < best_entropy:\n",
    "                    best_entropy = e\n",
    "                    best_d = d\n",
    "                    best_value = v\n",
    "    return best_entropy, best_d, best_value"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 我们来看看，第一次划分，在什么维度上，取什么值"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_entropy =  0.69314718056\nbest_d =  0\nbest_value =  2.45\n"
     ]
    }
   ],
   "source": [
    "best_entropy, best_d, best_value = try_split(X, y)\n",
    "print('best_entropy = ', best_entropy)\n",
    "print('best_d = ', best_d)\n",
    "print('best_value = ', best_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "X1_l,X1_r,y1_l,y1_r = split(X,y,best_d,best_value)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y1_l)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到y1_l的分类都是0，所以没有不确定性，信息熵为0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Counter({0: 50})"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "Counter(y1_l)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.69314718055994529"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y1_r)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "y1_l已经没必要再划分，我们只需要对y1_r继续划分"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "best_entropy2 =  0.413227889936\nbest_d2 =  1\nbest_value2 =  1.75\n"
     ]
    }
   ],
   "source": [
    "best_entropy2, best_d2, best_value2 =try_split(X1_r, y1_r)\n",
    "print('best_entropy2 = ', best_entropy2)\n",
    "print('best_d2 = ', best_d2)\n",
    "print('best_value2 = ', best_value2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "X2_l,X2_r,y2_l,y2_r = split(X1_r,y1_r,best_d2,best_value2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(0.30849545083110386, 0.10473243910508653)"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "entropy(y2_l), entropy(y2_r)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "可以看到信息熵都没有降为0，还可以继续划分  \n",
    "不过我们现在划分了两次，对应的就是那个depth/max_depth"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 练习：构建决策树"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.6"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
